/*
  project=7531865
*/

/*Setting the libraries and calling the data files*/

libname temp69 'path1';
DATA tax69;
   SET temp69.file1 (KEEP=var1);
RUN;

libname temp74 'path2';
DATA tax74;
   SET temp74.file2 (KEEP=var2 var3 var4);
RUN;

libname temp79 'path3';
DATA tax79;
   SET temp79.file3 (KEEP=var5 var6 var4);
RUN;

libname temp84 'path4';
DATA tax84;
   SET temp84.file4 (KEEP=var5 var6 var4);
RUN;

libname temp89 'path5';
DATA tax89;
   SET temp89.file5 (KEEP=var5 var6 var7);
RUN;

libname temp94 'path6';
DATA tax94;
   SET temp94.path6 (KEEP=var5 var6 var7);
RUN;


/*Compiling the summary statistics

NOTES:
 - Limited Spouse PIKs to records with a filing status of Married, filing jointly or Married, filing separately.
 - There are records with other filing statuses that have Spouse PIKs, but these do not seem to be consistently applied.

*/

PROC SQL;
SELECT 
'1969' AS TaxYear,
COUNT(*) AS TotalRecords,
COUNT(var1) AS FilerPIKs,
COUNT(var1)*100/COUNT(*) AS PercentRecordsPIKed,
'' AS SpousePIKs,
'' AS RecordsWithMarriedFilingStatus,
'' AS PercentMarriedFilingStatusPIKed
FROM tax69

UNION ALL

SELECT 
'1974' AS TaxYear,
COUNT(*) AS TotalRecords,
COUNT(var2) AS FilerPIKs,
COUNT(var2)*100/COUNT(*) AS PercentRecordsPIKed,
SUM(CASE WHEN var3 IS NOT NULL AND (var4 = 'value1' OR var4 = 'value2') THEN 1 ELSE 0 END) AS SpousePIKs,
SUM(CASE WHEN var4 = 'value1' OR var4 = 'value2' THEN 1 ELSE 0 END) AS RecordsWithMarriedFilingStatus,
SUM(CASE WHEN var3 IS NOT NULL AND (var4 = 'value1' OR var4 = 'value2') THEN 1 ELSE 0 END)*100/SUM(CASE WHEN var4 = 'value1' OR var4 = 'value2' THEN 1 ELSE 0 END) AS PercentMarriedFilingStatusPIKed
FROM tax74

UNION ALL

SELECT 
'1979' AS TaxYear,
COUNT(*) AS TotalRecords,
COUNT(var5) AS FilerPIKs,
COUNT(var5)*100/COUNT(*) AS PercentRecordsPIKed,
SUM(CASE WHEN var6 IS NOT NULL AND (var4 = 'value1' OR var4 = 'value2') THEN 1 ELSE 0 END) AS SpousePIKs,
SUM(CASE WHEN var4 = 'value1' OR var4 = 'value2' THEN 1 ELSE 0 END) AS RecordsWithMarriedFilingStatus,
SUM(CASE WHEN var6 IS NOT NULL AND (var4 = 'value1' OR var4 = 'value2') THEN 1 ELSE 0 END)*100/SUM(CASE WHEN var4 = 'value1' OR var4 = 'value2' THEN 1 ELSE 0 END) AS PercentMarriedFilingStatusPIKed
FROM tax79

UNION ALL

SELECT 
'1984' AS TaxYear,
COUNT(*) AS TotalRecords,
COUNT(var5) AS FilerPIKs,
COUNT(var5)*100/COUNT(*) AS PercentRecordsPIKed,
SUM(CASE WHEN var6 IS NOT NULL AND (var4 = 'value1' OR var4 = 'value2') THEN 1 ELSE 0 END) AS SpousePIKs,
SUM(CASE WHEN var4 = 'value1' OR var4 = 'value2' THEN 1 ELSE 0 END) AS RecordsWithMarriedFilingStatus,
SUM(CASE WHEN var6 IS NOT NULL AND (var4 = 'value1' OR var4 = 'value2') THEN 1 ELSE 0 END)*100/SUM(CASE WHEN var4 = 'value1' OR var4 = 'value2' THEN 1 ELSE 0 END) AS PercentMarriedFilingStatusPIKed
FROM tax84

UNION ALL

SELECT 
'1989' AS TaxYear,
COUNT(*) AS TotalRecords,
COUNT(var5) AS FilerPIKs,
COUNT(var5)*100/COUNT(*) AS PercentRecordsPIKed,
SUM(CASE WHEN var6 IS NOT NULL AND (var7 = 'value1' OR var7 = 'value2') THEN 1 ELSE 0 END) AS SpousePIKs,
SUM(CASE WHEN var7 = 'value1' OR var7 = 'value2' THEN 1 ELSE 0 END) AS RecordsWithMarriedFilingStatus,
SUM(CASE WHEN var6 IS NOT NULL AND (var7 = 'value1' OR var7 = 'value2') THEN 1 ELSE 0 END)*100/SUM(CASE WHEN var7 = 'value1' OR var7 = 'value2' THEN 1 ELSE 0 END) AS PercentMarriedFilingStatusPIKed
FROM tax89

UNION ALL

SELECT 
'1994' AS TaxYear,
COUNT(*) AS TotalRecords FORMAT=COMMA20.,
COUNT(var5) AS FilerPIKs FORMAT=COMMA20.,
COUNT(var5)*100/COUNT(*) AS PercentRecordsPIKed,
SUM(CASE WHEN var6 IS NOT NULL AND (var7 = 'value1' OR var7 = 'value2') THEN 1 ELSE 0 END) AS SpousePIKs,
SUM(CASE WHEN var7 = 'value1' OR var7 = 'value2' THEN 1 ELSE 0 END) AS RecordsWithMarriedFilingStatus,
SUM(CASE WHEN var6 IS NOT NULL AND (var7 = 'value1' OR var7 = 'value2') THEN 1 ELSE 0 END)*100/SUM(CASE WHEN var7 = 'value1' OR var7 = 'value2' THEN 1 ELSE 0 END) AS PercentMarriedFilingStatusPIKed
FROM tax94;